2018 Week 33 - Malaria Data
data_deaths <- read_csv("malaria_deaths.csv")
data_deaths_age <- read_csv("malaria_deaths_age.csv")
data_incidence <- read_csv("malaria_incidence.csv")
# prevelance/parasite rate
kenya_pr <- getPR(ISO = "KEN", # KENYA
species = "BOTH") %>%
filter(!is.na(pr))
glimpse(kenya_pr)
Rows: 1,855
Columns: 28
$ dhs_id <chr> "", "", "", "", "", "", "", "", ""…
$ site_id <int> 13580, 8231, 22331, 16507, 4231, 1…
$ site_name <chr> "Kora Kora", "Ulutya Primary Schoo…
$ latitude <dbl> -0.6097, -0.9724, -3.8442, -1.3149…
$ longitude <dbl> 39.7807, 37.6902, 39.7527, 36.8112…
$ rural_urban <chr> "UNKNOWN", "RURAL", "UNKNOWN", "UR…
$ country <chr> "Kenya", "Kenya", "Kenya", "Kenya"…
$ country_id <chr> "KEN", "KEN", "KEN", "KEN", "KEN",…
$ continent_id <chr> "Africa", "Africa", "Africa", "Afr…
$ month_start <int> 5, 10, 5, 7, 11, 3, 7, 8, 5, 9, 5,…
$ year_start <int> 1994, 2009, 2009, 2009, 2009, 1995…
$ month_end <int> 5, 10, 5, 7, 11, 3, 7, 8, 5, 9, 5,…
$ year_end <int> 1994, 2009, 2009, 2009, 2009, 1995…
$ lower_age <dbl> 0.0, 5.0, 0.6, 4.0, 5.0, 0.0, 0.0,…
$ upper_age <int> 6, 17, 8, 15, 17, 4, 9, 4, 14, 15,…
$ examined <int> 270, 109, 11, 93, 110, 168, 133, 2…
$ positive <dbl> 36, 0, 2, 2, 48, 107, 111, 1, 4, 4…
$ pr <dbl> 0.1333, 0.0000, 0.1818, 0.0215, 0.…
$ species <chr> "P. falciparum", "P. falciparum", …
$ method <chr> "Microscopy", "RDT", "Microscopy",…
$ rdt_type <chr> "", "Paracheck PF - Rapid test for…
$ pcr_type <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ malaria_metrics_available <chr> "true", "true", "true", "true", "t…
$ location_available <chr> "true", "true", "true", "true", "t…
$ permissions_info <chr> "", "", "", "", "", "", "", "", ""…
$ citation1 <chr> " (1994). <i>Vitamin A deficiency …
$ citation2 <chr> "", "Gitonga, CW, Karanja, PN, Kih…
$ citation3 <chr> "", "", "", "", "", "", "", "", ""…
FALSE
1855
# plot
kenya_pr %>%
group_by(year_start) %>%
dplyr::summarise(examined = sum(examined),
positive = sum(positive),
studies = n()) %>%
mutate (pr = positive/examined) %>%
ggplot(aes(year_start, pr)) +
geom_line() +
labs(title = "Change in Prevalance Rate (Positive/Examined) rate over the years",
subtitle = "Prevalance rate decreased over the years",
x = "Year",
y = "Prevalance Rate",
caption = "Source: mariaAtlas package") +
theme_few()
kenya_pr %>%
arrange(pr) %>%
ggplot(aes(longitude, latitude, col = pr)) +
geom_point() +
borders("world", regions = "Kenya") +
scale_colour_gradient2(low = "blue", high = "red",
midpoint = 0.5,
labels = scales::percent_format()) +
labs(title = "Prevalence of Malaria in Kenya",
caption = "Source: mariaAtlas package") +
coord_map() +
theme_void()
Aggregate Prevalence by decade
kenya_pr %>%
group_by(decade = 10 * (year_start %/% 10)) %>%
arrange(pr) %>%
ggplot(aes(longitude, latitude, col = pr)) +
geom_point() +
borders("world", regions = "Kenya") +
scale_colour_gradient2(low = "blue", high = "red",
midpoint = 0.5,
labels = scales::percent_format()) +
labs(title = "Prevalence of Malaria in Kenya, by decade",
caption = "Source: mariaAtlas package",
col = "Prevalence") +
coord_map() +
facet_wrap ( ~decade) +
theme_void()
Looking at aggregated data
glimpse(data_incidence)
Rows: 508
Columns: 4
$ Entity <chr> …
$ Code <chr> …
$ Year <dbl> …
$ `Incidence of malaria (per 1,000 population at risk) (per 1,000 population at risk)` <dbl> …
# change column names
malaria_inc_processed <- data_incidence %>%
setNames(c("country", "code", "year", "incidence")) %>%
mutate(incidence = incidence /1000)
malaria_inc_processed%>%
filter(country %in% sample(unique(country), 6)) %>%
ggplot(aes(year, incidence, col = country)) +
geom_line() +
scale_y_continuous(labels = scales::percent_format()) +
theme_few()
malaria_spread <- malaria_inc_processed %>%
mutate(year = paste0("Y",year)) %>%
pivot_wider(names_from = year,
values_from = incidence) %>%
mutate(current = Y2015,
change = Y2015 - Y2000)
malaria_spread %>%
filter(country != "Turkey", # outlier
!is.na(code)) %>% # no country code
ggplot(aes(current, change)) +
geom_point() +
geom_text(aes(label = code), vjust = 1, hjust = 1) +
theme_few()
# what countries are not in the map data?
malaria_spread %>%
anti_join(map_data("world"), by = c(country = "region"))
# A tibble: 32 x 8
country code Y2000 Y2005 Y2010 Y2015 current change
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Congo COG 0.364 0.350 0.217 0.173 0.173 -0.190
2 Cote d'Ivoire CIV 0.525 0.531 0.446 0.349 0.349 -0.177
3 Democratic Repu… COD 0.508 0.525 0.427 0.246 0.246 -0.262
4 Early-demograph… <NA> 0.0837 0.0616 0.0475 0.0289 0.0289 -0.0548
5 East Asia & Pac… <NA> 0.0227 0.0207 0.0201 0.00570 0.00570 -0.0170
6 East Asia & Pac… <NA> 0.0228 0.0211 0.0205 0.00580 0.00580 -0.0170
7 East Asia & Pac… <NA> 0.0228 0.0207 0.0202 0.00572 0.00572 -0.0171
8 Fragile and con… <NA> 0.319 0.305 0.247 0.180 0.180 -0.139
9 Heavily indebte… <NA> 0.408 0.326 0.274 0.198 0.198 -0.209
10 IBRD only <NA> 0.0355 0.0383 0.0269 0.0152 0.0152 -0.0203
# … with 22 more rows
maps::iso3166 %>%
as_tibble()
# A tibble: 269 x 5
a2 a3 ISOname mapname sovereignty
<chr> <chr> <chr> <chr> <chr>
1 AW ABW Aruba Aruba Netherlands
2 AF AFG Afghanistan Afghanistan Afghanistan
3 AO AGO Angola Angola Angola
4 AI AIA Anguilla Anguilla Anguilla
5 AX ALA Aland Islands Finland:Aland Is… Finland
6 AL ALB Albania Albania Albania
7 AD AND Andorra Andorra Andorra
8 AE ARE United Arab Emira… United Arab Emir… United Arab Emira…
9 AR ARG Argentina Argentina Argentina
10 AM ARM Armenia Armenia Armenia
# … with 259 more rows
world <- map_data("world") %>%
filter(region != "Antarctica")
data_plot <- malaria_inc_processed %>%
filter(incidence < 1) %>%
inner_join(maps::iso3166 %>%
select(a3, mapname),
by = c(code = "a3")) %>%
inner_join(world, by = c(mapname = "region"))
glimpse(data_plot)
Rows: 147,970
Columns: 10
$ country <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afgh…
$ code <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "…
$ year <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 20…
$ incidence <dbl> 0.1071, 0.1071, 0.1071, 0.1071, 0.1071, 0.1071, 0.…
$ mapname <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afgh…
$ long <dbl> 74.89131, 74.84023, 74.76738, 74.73896, 74.72666, …
$ lat <dbl> 37.23164, 37.22505, 37.24917, 37.28564, 37.29072, …
$ group <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
$ order <int> 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24…
$ subregion <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
data_plot %>%
ggplot(aes(long, lat, group = group, fill = incidence)) +
geom_polygon() +
scale_fill_gradient2(low = "blue",
high = "red",
midpoint = 0.2,
labels = scales::percent_format()) +
facet_wrap( ~ year) +
coord_map() +
labs(title = "Malaria incidence over time around the world",
subtitle = "Malaria incidence had generally decreased over time.",
fill = "Incidence",
caption = "Source: malariaAtlas package") +
theme_void() +
theme(strip.text = element_text(face = "bold", size = 14),
title = element_text(face = "bold", size = 16))
glimpse(data_deaths)
Rows: 6,156
Columns: 4
$ Entity <chr> …
$ Code <chr> …
$ Year <dbl> …
$ `Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate) (per 100,000 people)` <dbl> …
# change column names
malaria_deaths_processed <- data_deaths %>%
setNames(c("country", "code", "year", "deaths"))
glimpse(malaria_deaths_processed)
Rows: 6,156
Columns: 4
$ country <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghan…
$ code <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AF…
$ year <dbl> 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998…
$ deaths <dbl> 6.802930, 6.973494, 6.989882, 7.088983, 7.392472, 7.…
https://www.youtube.com/watch?v=5_6O2oDy5Jk&list=PL19ev-r1GBwkuyiwnxoHTRC8TTqP8OEi8&index=77
For attribution, please cite this work as
lruolin (2021, June 20). pRactice corner: Tidy Tuesday Series. Retrieved from https://lruolin.github.io/myBlog/posts/20210620_Tidytuesday malaria data/
BibTeX citation
@misc{lruolin2021tidy, author = {lruolin, }, title = {pRactice corner: Tidy Tuesday Series}, url = {https://lruolin.github.io/myBlog/posts/20210620_Tidytuesday malaria data/}, year = {2021} }